/*=======================================================================================
	TxnData.do

		Constructs working datasets from a large Personal Finance Website (PFW) 

	Author: Lorenz Kueng, June 2015
=========================================================================================*/

cap log close PFW_02
log using "$homedir/log-files/PFW_02_$date.log", text replace name(PFW_02)

* APFD: 
*   2010 - $1281 (direct deposit: 10/07/2010)
*   2011 - $1174 (10/06/2011)
*   2012 - $878  (10/04/2012)
*   2013 - $900  (10/03/2013)


*=============================================================
* Create baseline transactions data set
*=============================================================

	*-------------------------------------------------
	* Load transactions (TXNS) after dropping duplicates accounts
	*-------------------------------------------------

use "$PFWdata/Alaska/Sample Database/Final/stata_eb/txns_sample_wo_duplicates.dta", clear

	/*
		NOTE: This dataset used duplicates_filter.do, a filter that drops duplicate observations.
			  
			  It's equivalent to first loading txns_sample.dta and then running 
			  PFW_duplicates_filer.do, which generates txns_sample_wo_duplicates.dta			  
	*/

		generate _temp = round(amount,0.01) // round transaction amoutns to cents
		replace amount = _temp
		drop _temp
		
	*-------------------------------------------------
	* Add state identifier 
	*-------------------------------------------------
				
		merge m:1 userid using "$PFWdata/Alaska/Sample Database/Final/user_profile.dta", keepusing(homezipcode)
		drop if _merge==2
		drop _merge
		rename homezipcode zip
		merge m:1 zip using "$PFWdata/Alaska/Sample Database/Final/zip_state.dta"
		drop if _merge==2
		drop _merge zip

		
	*-------------------------------------------------
	* Create date variables
	*-------------------------------------------------

		generate date = mdy(month,day,year)
		format date %td
		
		generate Month = ym(year,month)
		format   Month %tm
		
		order date Month, after(userid)

		generate PFD_year = 0
		replace  PFD_year = 2009 if Month>=m(2009m10) & Month<m(2010m10)
		replace  PFD_year = 2010 if Month>=m(2010m10) & Month<m(2011m10)
		replace  PFD_year = 2011 if Month>=m(2011m10) & Month<m(2012m10)
		replace  PFD_year = 2012 if Month>=m(2012m10) & Month<m(2013m10)
		replace  PFD_year = 2013 if Month>=m(2013m10) & Month<m(2014m10)
		tabulate PFD_year
		lab var  PFD_year "12-month period from October to next September, eg 2010 = Oct'10-Sept'11"
		
		order    PFD_year, after(day)

		
		***************************************
		* Drop months with few observations
		***************************************

		tab Month
		bysort state: tab Month
		count
		drop if Month<m(2010m9) | Month>m(2014m5)  // drops 0.38% observations (17,432,499 out of 17,498,364)
		count
		

	*-------------------------------------------------
	* Clean up categoryid
	*-------------------------------------------------
	
		generate inferredcategoryid_original = inferredcategoryid
		order    inferredcategoryid_original, after(inferredcategoryid)
		
		*  Create a new category 9999 for user-defined transactions
			
		replace  inferredcategoryid = 9999 if inferredcategoryid>5005

		* Replace missing category with 999, which is easier to handle in the code below
		
		replace  inferredcategoryid =  999 if inferredcategoryid==. | inferredcategoryid==-999 


	*-------------------------------------------------
	* - Identify large durable purchases  (durable_large)
	* - Separate rent from mortgage payments  (rent, mortgage)
	* - Identify cash withdrawals and check payments  (does not create a new variable; sets inferredcategoryid=2001 if transaction description is 'ATM/cash withdrawal' or similar)
	*-------------------------------------------------

		* Identify large durable purchases from uncategorized checks and bank transactions

		do "$homedir/do-files/PFW_add_durable_column.do" 
		
		generate durable_large1  = amount * mask_durable_bydesc
		generate durable_large2  = amount * mask_durable_byamount
		generate durable_large12 = amount * mask_durable_combined
			
		drop mask_durable_*
		

		* Break up mortgage payments and rent

		do "$homedir/do-files/PFW_separate_rent_n_mortgage.do"

		generate rent     = amount * rent_flag
		generate mortgage = amount * mortgage_flag
		lab var  rent     "rental payments inferred from category 1207"
		lab var  mortgage "mortgage payments inferred from category 1207"
			
		drop rent_flag mortgage_flag


		* Identify cash withdrawals and check payments 

		do "$homedir/do-files/PFW_Cash_Check_Categorization.do" 

	
*================================
*================================
*================================
save _temp1,replace
use  _temp1,clear	
*================================
*================================
*================================

	
	*-------------------------------------------------
	* Alaska Permanent Fund Dividend payments 
	*-------------------------------------------------
		
	*** Identify APFD receipts

	cap drop description
	generate description = lower(genericXXXXdescription)
	
		** PFD received via direct deposits
		
			generate APFDid_DirectDep =	  regexm(description, "soa pfd") ///
										| regexm(description, "direct deposit soa") ///
										| regexm(description, "external deposit soa") ///
										| regexm(description, "electronic deposit soa") ///
										| regexm(description, "pfd internet deposit")
			lab var  APFDid_DirectDep "identifies receipt of an APF dividend via direct deposits"
			
			summarize amount if APFDid_DirectDep==1, detail
			tabulate inferredcategoryid if APFDid_DirectDep==1 & amount<0 
			tabulate inferredcategoryid if APFDid_DirectDep==1 & amount>0, sort 
			
			replace  APFDid_DirectDep=0 if APFDid_DirectDep==1 & amount<0 // these 3 transactions are uncategorized and hence protentially speding (cat 20)
			
			tabulate APFDid_DirectDep
			
			tabulate inferredcategoryid if APFDid_DirectDep==1, sort

		** PFD received via check deposits but not direct deposits (i.e. check deposits of the same size as the dividend in that year)
		
			generate APFDid_Check =  (Month>=m(2010m10) & Month<m(2011m10) & regexm(description, "deposit") & amount==round(1281,0.01)) /// includes: External Deposit, Deposit Mobile, ATM Deposit, Deposit Home, Deposit, Pfd Internet Deposit
									|(Month>=m(2011m10) & Month<m(2012m10) & regexm(description, "deposit") & amount==round(1174,0.01)) ///
									|(Month>=m(2012m10) & Month<m(2013m10) & regexm(description, "deposit") & amount==round( 878,0.01)) ///
									|(Month>=m(2013m10) & Month<m(2014m10)                                  & amount==round( 900,0.01)  /// need more restrictions in 2013 since $900 is an even number	
																		   & (description=="external deposit soa"|description=="deposit mobile"|description=="deposit"|description=="demand deposit soa"|description=="remote online deposit"|description=="deposit online banking") ) 
			replace  APFDid_Check = 0 if APFDid_DirectDep==1
			lab var  APFDid_Check "identifies receipt of an APF dividend via check deposits"
			tabulate APFDid_Check

			tabulate inferredcategoryid if APFDid_Check==1, sort		

			
		** Combine both PFD identifiers
		
			generate APFDid_AllPayments = APFDid_DirectDep==1 | APFDid_Check==1
			lab var  APFDid_AllPayments "identifies receipt of an APF dividend via any form of payment"
			tabulate APFDid_AllPayments
			tabulate inferredcategoryid if APFDid_AllPayments==1, sort	


		** For the conservative baseline regression, use only PF direct deposits (APFDid) at the start of October (=97% of dividend direct deposit payments in the sample)
		**  -> This makes it easier to match to calendar monthly spending

			generate Day = mdy(month,day,year)
			format   Day %td
		 
				//check distribution of payments
				generate t0 = Day if APFDid_DirectDep==1
				format   t0 %td
				tabulate t0
				count if t0!=.
				tabulate t0, sort

			generate APFDid_DirectDep_start_Oct = APFDid_DirectDep
			replace  APFDid_DirectDep_start_Oct = 0 if    /// set observations with positive PF income to zero if it's not received by direct deposit
				t0!=d(06oct2010) & ///
				t0!=d(07oct2010) & ///
				t0!=d(05oct2011) & ///
				t0!=d(06oct2011) & ///
				t0!=d(03oct2012) & ///
				t0!=d(04oct2012) & ///
				t0!=d(02oct2013) & ///
				t0!=d(03oct2013)
			lab var  APFDid_DirectDep_start_Oct "identifies receipt of an APF dividend via direct deposits at beginning of Oct"
			order    APFDid_DirectDep_start_Oct, after(APFDid_DirectDep)
			
				//check again
				drop t0
				generate t0 = Day if APFDid_DirectDep_start_Oct==1
				format   t0 %td
				tabulate t0
				count if t0!=.
				drop t0 Day


	
*================================
*================================
*================================
save _temp1b,replace
use  _temp1b,clear	
*================================
*================================
*================================
		
		
		** Clean identifiers after manual inspection
		
			ds APFDid*
			foreach var in `r(varlist)' {
				replace `var'=0 if `var'==1 & round(amount,0.01)==292.61 // seems to be a missclassified dividend since the same amount repeats every year independent of the dividend size; the misleading transaction description is "External Deposit Soa"
			}
		
		
		** Identify partial dividends and split into likely garnished vs likely charitable contributions via Pick.Click.Give
	
			* full dividend
			cap drop APFDid_FullAmount
			generate APFDid_FullAmount = APFDid_AllPayments==1 & ( ///
														round(amount,0.01)==1281 | /// 2010
														round(amount,0.01)==1174 | /// 2011
														round(amount,0.01)== 878 | /// 2012
														round(amount,0.01)== 900   /// 2013
													   )
			lab var  APFDid_FullAmount "identifies dividends that were received in full"
			
			
			* donated dividends (Note: Only partially donated dividends can be identified, since full donations do generate a transaction in the PFW data.)

			generate APFDid_Donated = 0
			local PFD = 1281 // 2010
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2010 & round(amount,0.01)==round(`PFD'/2,0.01)
			forvalues i=1/32 { // ie from 25 to 800
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2010 & round(amount,0.01)==round(`PFD'-`i'*25,0.01)
			} 
			local PFD = 1174 // 2011
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2011 & round(amount,0.01)==round(`PFD'/2,0.01)
			forvalues i=1/32 { 
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2011 & round(amount,0.01)==round(`PFD'-`i'*25,0.01)
			} 
			local PFD =  878 // 2012
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2012 & round(amount,0.01)==round(`PFD'/2,0.01)
			forvalues i=1/32 { 
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2012 & round(amount,0.01)==round(`PFD'-`i'*25,0.01)
			} 
			local PFD =  900 // 2013
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2013 & round(amount,0.01)==round(`PFD'/2,0.01)
			forvalues i=1/32 { 
				replace APFDid_Donated = 1 if APFDid_AllPayments==1 & PFD_year==2013 & round(amount,0.01)==round(`PFD'-`i'*25,0.01)
			} 
			lab var  APFDid_Donated "identifies dividends that were partially donated to charity or a college saving plan"
	
	
			* garnished dividends (Note: Only partially garnished dividends can be identified.)
			
			generate APFDid_Garnished = 0
			replace  APFDid_Garnished = 1 if APFDid_AllPayments==1 & PFD_year==2010 & round(amount,0.01)!=round(1281,0.01) & APFDid_Donated!=1 & APFDid_FullAmount!=1
			replace  APFDid_Garnished = 1 if APFDid_AllPayments==1 & PFD_year==2011 & round(amount,0.01)!=round(1174,0.01) & APFDid_Donated!=1 & APFDid_FullAmount!=1
			replace  APFDid_Garnished = 1 if APFDid_AllPayments==1 & PFD_year==2012 & round(amount,0.01)!=round( 878,0.01) & APFDid_Donated!=1 & APFDid_FullAmount!=1
			replace  APFDid_Garnished = 1 if APFDid_AllPayments==1 & PFD_year==2013 & round(amount,0.01)!=round( 900,0.01) & APFDid_Donated!=1 & APFDid_FullAmount!=1
			lab var  APFDid_Garnished "identifies dividends that were partially garnished by governments or courts"
			
			tab1 APFDid_FullAmount APFDid_Donated APFDid_Garnished if APFDid_AllPayments==1

			foreach var in APFDid_FullAmount APFDid_Donated APFDid_Garnished APFDid_AllPayments {
				display _n(2)"`var'"
				tab1 amount if `var'==1, sort
			}

			tabulate amount if APFDid_AllPayments==1, sort

			bysort PFD_year: tabulate amount if APFDid_AllPayments==1, sort


		** Calculate size of APFD payments by userid and year
		
			generate APFD_DirectDep = amount * APFDid_DirectDep
			lab var  APFD_DirectDep "APFD payments via direct depostits (at any date)"

			generate APFD_DirectDep_start_Oct = amount * APFDid_DirectDep_start_Oct
			lab var  APFD_DirectDep_start_Oct "APFD payments via direct depostits at beginning of October"
			
			generate APFD_Check = amount * APFDid_Check
			lab var  APFD_Check "APFD payments via check deposits (at any date)"

			generate APFD_AllPayments = amount * APFDid_AllPayments
			lab var  APFD_AllPayments "APFD payments via any form of payment (at any date)"

			generate APFD_FullAmount = amount * APFDid_FullAmount
			lab var  APFD_FullAmount "APFD payments received in full (any form and date)"

			generate APFD_Donated = amount * APFDid_Donated
			lab var  APFD_Donated "APFD payments received after voluntary deductions (any form and date)"
			
			generate APFD_Garnished = amount * APFDid_Garnished
			lab var  APFD_Garnished "APFD payments received after involuntary deductions (any form and date)"
			
			// PFD variable used for main analysis
			generate APFDid  = APFDid_DirectDep_start_Oct
			lab var  APFDid "= APFDid_DirectDep_start_Oct"
			generate APFD    = APFD_DirectDep_start_Oct 
			lab var  APFD   "= APFD_DirectDep_start_Oct"
			order APFDid, before(APFDid_DirectDep)
			order APFD  , before(APFD_DirectDep)
	
			
		** Deduction amounts (voluntary and involuntary)

			generate _PFD = 0
			replace  _PFD = 1305 if Month>=m(2009m10) & Month<m(2010m10)
			replace  _PFD = 1281 if Month>=m(2010m10) & Month<m(2011m10)
			replace  _PFD = 1174 if Month>=m(2011m10) & Month<m(2012m10)
			replace  _PFD =  878 if Month>=m(2012m10) & Month<m(2013m10)
			replace  _PFD =  900 if Month>=m(2013m10) & Month<m(2014m10)
			tabulate _PFD

			cap drop APFD_deduc_voluntary
			generate APFD_deduc_voluntary   = 0
			replace  APFD_deduc_voluntary   = _PFD - APFD_Donated   if APFD_Donated  !=0
			order    APFD_deduc_voluntary, before(APFD_Donated)
			lab var  APFD_deduc_voluntary "amount voluntarily deducted from PFD (for charity or college saving plan)"
			
			cap drop APFD_deduc_involuntary
			generate APFD_deduc_involuntary = 0
			replace  APFD_deduc_involuntary = _PFD - APFD_Garnished if APFD_Garnished!=0
			order    APFD_deduc_involuntary, before(APFD_Garnished)
			lab var  APFD_deduc_involuntary "amount involuntarily deducted from PFD (garnished by governments or courts)"

			foreach var in APFD_deduc_voluntary APFD_deduc_involuntary {
				summarize `var' if APFDid_AllPayments==1
				summarize `var' if `var'!=0, detail
			}
			
			drop _PFD
	

		** Reclassify APFD payments as new income category  (otherwise there are mechanical effects on income or investments) 
		
			replace inferredcategoryid = 3101 if APFDid_DirectDep ==1 // 3101 = PFD income from direct deposit
			replace inferredcategoryid = 3102 if APFDid_Check     ==1 // 3102 = PFD income from other deposits (ie cashed checks)	
		
		
		** Flag cash outflows (ie spending) that are the same amount as the dividends in that year
		
			generate APFDid_equal_spending = 0
			replace  APFDid_equal_spending = 1 if round(amount,0.01) == -1281 & Month>=m(2010m10) & Month<m(2011m10)
			replace  APFDid_equal_spending = 1 if round(amount,0.01) == -1174 & Month>=m(2011m10) & Month<m(2012m10) 
			replace  APFDid_equal_spending = 1 if round(amount,0.01) ==  -878 & Month>=m(2012m10) & Month<m(2013m10)
			replace  APFDid_equal_spending = 1 if round(amount,0.01) ==  -900 & Month>=m(2013m10) & Month<m(2014m10) & ( description=="external deposit soa"|description=="deposit mobile"|description=="deposit"|description=="demand deposit soa"|description=="remote online deposit"|description=="deposit online banking" ) // need more restrictions in 2013 since $900 is too comon to flag it as a potentially misclassified PFD transaction

			foreach category in 3101 3102 30 3001 3003 3004 3005 3006 3007 /// incomes
								50 5001 5002 5003 5004 5005 /// investments
								21 2101 /// transfers to other accounts
								{ // non-expenditure categories don't cause a problem for the MPC estimate
				display "`category'"
				replace APFDid_equal_spending = 0 if APFDid_equal_spending==1 & inferredcategoryid==`category'
			}
				replace APFDid_equal_spending = 0 if APFDid_equal_spending==1 & (inferredcategoryid==15|inferredcategoryid==1501|inferredcategoryid==1502) // airline tickets and hotels can have exact same amount as PFD because of special PFD promotions
			
			tabulate inferredcategoryid if APFDid_equal_spending==1, sort
			
			replace  inferredcategoryid = 3103 if APFDid_equal_spending==1 // 3103 = potentially miss-classified PFD transaction		
		
*================================
*================================
*================================
save _temp2,replace
use  _temp2,clear	
*================================
*================================
*================================


	*-------------------------------------------------
	* Summarize APFD shock 
	*-------------------------------------------------
	
		/*
			Summary of results:
			- 81% of Alaskan users receive a PFD. This is similar to the aggregate statistics
			- 96% of PFDs are received via direct deposits, 4% via check deposits. This is higher than the population. The main reason is that more PFW users also use e-banking (and hence direct deposits), and non-direct deposit payments are more difficult to identify
			- 93% of PFDs (or 97% of direct deposits) are received on the same day or one day after the official disbursement date set by the APFDD. The one-day delay is due to banks needing one day to process the deposits. This is the sample used for the main analysis since these users receive the dividends at the beginning of October, and because the timing of the receipt is exogenous/predetermined, set by the APFDD. The timing of other receipts is potentially endogenous, for instance because the case had to be reviewed due to some issues with the dividend application.
		*/


	*** Count initial number of households in each state, and dividend receipts

		generate state_original = state
		order    state_original, after(state)
		tabulate state
		tabulate state if APFDid_AllPayments==1
		replace  state = "AK" if state!="AK" & APFDid_AllPayments==1 // reclassifies 60 PFD receipts from WA to AK (<1%)
		preserve
			keep userid state*
			bysort userid: keep if _n==1
			tab state
		restore
		
	*** Count fraction of Alaskans that receive a dividend
		
		preserve
			keep if state=="AK"
			bysort userid (date): egen _month_min = min(Month)
			bysort userid (date): egen _month_max = max(Month)
			format _month_min _month_max %tm		
			generate _oct = 10
			generate _min = ym(year(dofm(_month_min)),_oct)
			generate _month = month(dofm(_month_max))
			generate _year_max = year(dofm(_month_max)) 
			replace  _year_max = _year_max -1 if _month<10			
			generate _max = ym(_year_max,_oct)
			format _min _max %tm
			drop _oct _month _year_max		
			drop if Month<_min | Month>_max // restricts users to full dividend years => no bias due to entry in the middle of the dividend cycle	
			bysort userid PFD_year: egen _APFDid_AllPayments_max = max(APFDid_AllPayments)
			generate _APFDid_AllPayments = APFDid_AllPayments
			replace  _APFDid_AllPayments =. if APFDid_AllPayments==0			
			bysort userid PFD_year (date): replace _APFDid_AllPayments = 0 if _APFDid_AllPayments==. & _APFDid_AllPayments_max==0 & _n==1 // Alaskans that didn't receive a dividend
			tab _APFDid_AllPayments
		restore
		
		
	*** Tabulate distribution of payments over time
		
		foreach var in month { // month date {
			display _n(2)"`var'"
			tabulate `var' if APFDid_AllPayments ==1, sort
			tabulate `var' if APFDid_DirectDep   ==1, sort
			tabulate `var' if APFDid_Check       ==1, sort
		}		

		
	*** Summarize PFD size
		
		ds APFD_*
		foreach var in `r(varlist)' {
			display _n(2)"`var'"
			summarize `var' if `var'!=0, detail
		}
		

	*-------------------------------------------------
	* Create individual categories (completely disaggregated)
	*-------------------------------------------------

		foreach cat in 	1	2	4	5	6	7	8	9	10	11	12	13	14	15	16	17	19	20	21	30	40	50	60	70	101	102	103	104	105	201	202	204	206	207	403	404	406	501	502	503	505	506	507	508	602	603	606	609	610	611	701	704	706	707	708	801	802	901	902	903	1001	1002	1003	1102	1105	1201	1202	1203	1204	1206	1207	1208	1301	1302	1303	1304	1306	1401	1402	1403	1404	1405	1406	1501	1502	1503	1504	1601	1602	1604	1605	1606	1607	1701	1702	1703	1704	1705	1901	1902	1903	1904	1905	2001	2002	2101	2102	3001	3003	3004	3005	3006	3007	5001	5002	5003	5004	5005 ///
						999  9999 ///
						3101 3102 3103 ///
			{
				display "`cat'"
				capture n: generate cat`cat' = amount if inferredcategoryid==`cat'
				capture n: replace  cat`cat' = 0      if cat`cat'==.
		}
		summarize cat999, detail
		if `r(min)'==`r(max)'	drop cat999 // drop if cat999 is all 0


	*-------------------------------------------------
	* Create new categories to reclassify some transactions
	*-------------------------------------------------

	*** child support income  vs  alimony payments (category 603,610)
	
		generate  cat60398  = amount if inferredcategoryid==603  & amount<0 // childsupPAY
		generate  cat60399  = amount if inferredcategoryid==603  & amount>0 // childsupINC
		summarize cat603 if cat603!=0, detail
		
		generate  cat61098  = amount if inferredcategoryid==610  & amount<0 // childsupPAY
		generate  cat61099  = amount if inferredcategoryid==610  & amount>0 // childsupINC
		summarize cat610 if cat610!=0, detail


	*** transfer to vs from accounts (category 5001,5002,21,2101)
	
		generate  cat500198 = amount if inferredcategoryid==5001 & amount<0 // negative deposit (acct_trns_to)
		generate  cat500199 = amount if inferredcategoryid==5001 & amount>0 // positive deposit (acct_trns_from)
		summarize cat5001 if cat5001!=0, detail

		generate  cat500298 = amount if inferredcategoryid==5002 & amount<0 // negative withdrawal (acct_trns_to)
		generate  cat500299 = amount if inferredcategoryid==5002 & amount>0 // positive withdrawal (acct_trns_from)
		summarize cat5002 if cat5002!=0, detail

		generate  cat2198   = amount if inferredcategoryid==21   & amount<0 // transfer to   another financial account (acct_trns_to)
		generate  cat2199   = amount if inferredcategoryid==21   & amount>0 // transfer from another financial account (acct_trns_from)
		summarize cat21 if cat21!=0, detail
		
		generate  cat210198 = amount if inferredcategoryid==2101 & amount<0 // transfer to   credit card (acct_trns_to)
		generate  cat210199 = amount if inferredcategoryid==2101 & amount>0 // transfer from credit card (acct_trns_from)
		summarize cat2001 if cat2001!=0, detail
		
		generate  cat210298 = amount if inferredcategoryid==2102 & amount<0 // transfer to   cash (acct_trns_from)
		generate  cat210299 = amount if inferredcategoryid==2102 & amount>0 // transfer from cash (acct_trns_from)
		summarize cat2102 if cat2102!=0, detail


	*-------------------------------------------------
	* Save
	*-------------------------------------------------

	drop date Month description state*
	
	compress
	label data "Baseline data from Personal Finance Website"
	sort  userid year month day
save "$homedir/data/stata/PFW.dta", replace





*=============================================================
* Create monthly panel (weakly balanced)
*=============================================================
*
use "$homedir/data/stata/PFW.dta", clear

	drop amount inferredcategoryid* genericXXXXdescription accountid flag_nodesc


	*-------------------------------------------------
	* Create dates
	*-------------------------------------------------

		generate Month = ym(year,month)
		format   Month %tm

		generate quarter = quarter( mdy(month,day,year) )
		
		generate Quarter = yq(year,quarter)
		format   Quarter %tq
		
		order userid Month month Quarter quarter year day


	*-------------------------------------------------
	* Aggregate to monthly values
	*-------------------------------------------------

		* disaggregated spending categories
		ds cat*
		foreach var in `r(varlist)' {
			di "`var'"
			bysort userid Month: egen _temp = total(`var')
			replace `var' = _temp
			drop _temp
		}
		* additional variables created above
		ds APFD*
		foreach var in durable_large1 durable_large2 durable_large12 rent mortgage `r(varlist)' {
			di "`var'"
			bysort userid Month: egen _temp = total(`var')
			replace `var' = _temp
			drop _temp
		}

*================================
*================================
*================================
save _temp3,replace
use  _temp3,clear	
*================================
*================================
*================================
		
		count // 17,432,499
		bysort userid Month (day): keep if _n==1
		count // 162,634


*================================
*================================
*================================
save _temp4,replace
use  _temp4,clear	
*================================
*================================
*================================


	*-------------------------------------------------
	* Create category aggregates (in particular, expenditures)
	*-------------------------------------------------

	*** Disaggregated spending

		do "$homedir/do-files/PFW_CategoryAggregation.do"


	*** Total expenditure

		generate totexp0 = nondur_serv + durable
		lab var  totexp0  "nondur_serv + durable"
		
		generate totexp1 = totexp0 + child_supp + otherexp + debt_pay + mortgage_rent 
		lab var  totexp1  "totexp0 + child_supp + otherexp + debt_pay + mortgage_rent"

		generate totexp2 = totexp1 + cash_withdrawals	
		lab var  totexp2  "totexp1 + cash"
		
		generate totexp3 = totexp2 + uncategorized	
		lab var  totexp3  "totexp2 + uncategorized"
			
		generate totexp4 = totexp3 + user_defined	
		lab var  totexp4  "totexp3 + user defined"
			
		generate totexp5 = totexp4 + check_payments	
		lab var  totexp5  "totexp4 + check payments"


	*-------------------------------------------------
	* Create other variables (controls, etc.)
	*-------------------------------------------------
	
	*** User demographics, ZIP code, and residential status

		merge m:1 userid using "$PFWdata/Alaska/Sample Database/Final/user_profile.dta", keepusing(householdadults householdchildren homezipcode residentialstatus age incomes gender profession marriagestatus educationlevel created_date)
		drop if _merge==2
		drop _merge
	
		rename created_date date_user_profile_created
			
		lab var homezipcode 		"residential ZIP code, self-reported"
		lab var residentialstatus	"home-owmership status, self-reported"
		lab var age 				"age of account owner, self-reported"
		lab var incomes 			"income brackets, self-reported"
		lab var gender 				"gender of account owner, self-reported"
		lab var profession 			"profession of account owner, self-reported"
		lab var marriagestatus 		"marital status of account owner, self-reported"
		lab var educationlevel		"education level of account owner, self-reported"


	*** State

		generate zip = homezipcode
		merge m:1 zip using "$PFWdata/Alaska/Sample Database/Final/zip_state.dta"
		drop if _merge==2
		drop _merge zip

		generate state_original = state
		order    state_original, after(state)

		rename statefip state_fips
		generate state_fips_original = state_fips
		order    state_fips_original, after(state_fips)

		bysort userid (Month): egen _temp = max(APFDid_AllPayments)
		preserve
			duplicates drop userid state, force
			bysort state: count if _temp!=0 & _temp!=. // of 1547 users that receive PFD, 27 users reside outside AK => potentially misclassified state variable
			count if state!="AK" & _temp!=0
		restore
		tabulate state if APFDid_AllPayments==1
		replace  state_fips=2 if state!="AK" & _temp!=0
		replace  state = "AK" if state!="AK" & _temp!=0 // reclassifies 48 PFD receipts from WA to AK (<2%)
		drop _temp
		
		order state*, after(homezipcode)


	*** Family size, both self-reported and inferred from the number of APFD checks received 

		** Self-reported family size
	
		rename householdadults   adults
		rename householdchildren children
		lab var adults   "# of adults, self-reported ('householdadults')"
		lab var children "# of children, self-reported ('householdchildren')"
		
		replace children=0 if children==. & adults!=.
		
		generate family_size = adults + children
		lab var  family_size "family size, self-reported (adults+children)"
		order    family_size, before(adults)
	
	
		** Number of checks received per userid and year

		bysort userid PFD_year: egen NumbChecks = total(APFDid_AllPayments)
		local  _N=1
		while `_N'>0 {
			bysort userid (Month): replace NumbChecks = NumbChecks[_n+1] if NumbChecks==0 & NumbChecks[_n+1]!=0 & NumbChecks[_n+1]!=.
			count if userid == userid[_n+1] & NumbChecks==0 & NumbChecks[_n+1]!=0 & NumbChecks[_n+1]!=.
			local _N=`r(N)'
		}
		local  _N=1
		while `_N'>0 {
			bysort userid (Month): replace NumbChecks = NumbChecks[_n-1] if NumbChecks==0 & NumbChecks[_n-1]!=0 & NumbChecks[_n-1]!=.
			count if userid == userid[_n-1] & NumbChecks==0 & NumbChecks[_n-1]!=0 & NumbChecks[_n-1]!=.
			local _N=`r(N)'
		}
		lab var NumbChecks "number of checks received per userid (proxy of family size)"
		order   NumbChecks, before(family_size)
		tabmiss NumbChecks // no missing

		generate family_sizeImp = family_size
		tabmiss  family_sizeImp
		bysort year: egen _temp = mean(family_sizeImp)
		replace  family_sizeImp = round(_temp) if family_sizeImp==.
		drop _temp
		lab var  family_sizeImp "self-reported family, missing values imputed with annual sample mean"
		order    family_sizeImp, after(family_size)
	
	
		** Household equivalence weights
		*
		*  Use OECD equivalence scale, which assigns 
		*   - a value of 1.0 to the first household member, 
		*   - a value of 0.7 to each additional adult, and 
		*   - a value of 0.5 to each child (i.e. members 16 and younger)

		generate adultsImp = adults
		bysort year state: egen temp_adults = mean(adults)
		replace adultsImp = temp_adults if adultsImp==.
		drop temp_adults

		generate childrenImp = children
		bysort year state: egen temp_children = mean(children)
		replace childrenImp = temp_children if childrenImp==.	
		drop temp_children
	
		generate equivalence = 1.0 if family_sizeImp == 1
		replace  equivalence = 1 + (adultsImp-1)*0.7 + 0.5*childrenImp if family_sizeImp > 1
		lab var  equivalence "OECD household equivalence scale"


	*** Aggregate dividend
	
		generate PFD = 0
		replace  PFD = 1281 if Month==m(2010m10)
		replace  PFD = 1174 if Month==m(2011m10)
		replace  PFD =  878 if Month==m(2012m10)
		replace  PFD =  900 if Month==m(2013m10)
		lab var  PFD "aggregate PFD paid"

	
	*** Placebo dividend, ie potential dividend for control group in Washington
		
		generate family = NumbChecks
		replace  family = family_sizeImp if NumbChecks==0
		lab var  family "= NumbChecks, or family_sizeImp if NumbChecks=0 (missing)"
		order    family, before(family_size)
		
		generate PFD_placebo = PFD * family // family_sizeImp
		lab var  PFD_placebo "APFD * family size, ie potential PFD for all users in sample"


	*** Annualized dividend (for relative dividend size and potential ex-ante welfare loss)

		* aggregate dividend
		generate PFD_annual = 1281 if year==2010
		replace  PFD_annual = 1174 if year==2011
		replace  PFD_annual =  878 if year==2012
		replace  PFD_annual =  900 if year==2013
		replace  PFD_annual = 1884 if year==2014
		lab var  PFD_annual "size of annual PFD"

		* user-level dividend
		bysort userid year: egen APFD_annual = total(APFD)  if state=="AK"
		bysort userid:      egen _temp = total(APFD_annual) if state=="AK"
		replace APFD_annual =. if _temp==0 & state=="AK" // set Alaskans for whom I don't ever see a direct deposit at the beginning of October to missing (either fully garnished dividend or they don't use direct deposit)
		drop _temp
		replace APFD_annual = PFD_annual * family if state=="AK" & APFD_annual==0 // impute for years with 0 PFD since dividend is most likely missing due to measurement error. Note: By conditioning on APFDid_annual!=., these observations will be dropped in the baseline specification to be conservative/for robustness
		replace APFD_annual = PFD_annual * family if state!="AK" // impute for users in Washington
		lab var APFD_annual "annualized PFD payments"

		order APFD_annual PFD PFD_placebo PFD_annual, after(APFD_Garnished)


	*** Annualized APFDid's

		ds APFDid*
		foreach var in `r(varlist)' {
			if "`var'"!="APFDid_DirectDep_start_Oct" {
				bysort userid PFD_year (Month): egen `var'_annual = total(`var') if state=="AK"
				bysort userid          (Month): egen _temp = total(`var'_annual) if state=="AK"
				replace `var'_annual =. if _temp==0 & state=="AK" 
				drop _temp
				replace `var'_annual =. if `var'_annual==0 & (Month==m(2010m10)|Month==m(2011m10)|Month==m(2012m10)|Month==m(2013m10))
				local  _N=1
				while `_N'>0 { // set observations to missing for Alaskans that do receive a dividend in later years, but not at the beginning => mostl likely, these users haven't linked all bank accounts yet 
					bysort userid (Month): replace `var'_annual = `var'_annual[_n+1] if `var'_annual==0 & `var'_annual[_n+1]!=0 
					count if userid == userid[_n+1] & `var'_annual==0 & `var'_annual[_n+1]!=0 
					local _N=`r(N)'
				}
				local  _N=1
				while `_N'>0 {
					bysort userid (Month): replace `var'_annual = `var'_annual[_n-1] if `var'_annual==0 & `var'_annual[_n-1]!=0
					count if userid == userid[_n-1] & `var'_annual==0 & `var'_annual[_n-1]!=0
					local _N=`r(N)'
				}
				replace `var'_annual = 1 if state!="AK" // imputed for Washington users
			}
		}
		order APFDid*_annual, after(APFDid_equal_spending)


	*** Liquid assets (created using PFW_LiquidAssets.do)

		merge m:1 userid using "$homedir/data/stata/PFW_LiquidAssets.dta"
		drop if _merge==2
		drop _merge


	*** Impute variable "incomes"

		bysort userid Month: egen incomeM = total(income) // average monthly income
		lab var  incomeM "monthly income, based on accounts"
		
		bysort userid year: egen incomeY = mean(incomeM)
		replace  incomeY = 12*incomeY
		lab var  incomeY "annualized income, based on accounts"

		generate incomes_original = incomes
		lab var  incomes_original "income bins, original self-reported"

		replace  incomes = 0      if incomes==. & incomeY<25000
		replace  incomes = 25000  if incomes==. & incomeY<50000  &  incomeY>=25000
		replace  incomes = 50000  if incomes==. & incomeY<75000  &  incomeY>=50000
		replace  incomes = 75000  if incomes==. & incomeY<100000 &  incomeY>=75000
		replace  incomes = 100000 if incomes==. & incomeY<150000 &  incomeY>=100000
		replace  incomes = 150000 if incomes==. & incomeY<.      &  incomeY>=150000
		lab var  incomes "income bins, self-reported and missing values imputed"

		order incomes_original income incomeM incomeY, after(incomes)
		

	*** Annualize income from PFD
	
		bysort userid year: egen  _temp = total(income_from_PFD)
		replace income_from_PFD = _temp
		drop _temp


	*** CPS weights

		generate age_bins = .
		replace  age_bins =  0 if age< 18
		replace  age_bins = 18 if age<=25 & age>=18
		replace  age_bins = 26 if age<=30 & age>=26
		replace  age_bins = 31 if age<=35 & age>=31
		replace  age_bins = 36 if age<=40 & age>=36
		replace  age_bins = 41 if age<=45 & age>=41
		replace  age_bins = 46 if age<=50 & age>=46
		replace  age_bins = 51 if age<=55 & age>=51
		replace  age_bins = 56 if age<=60 & age>=56
		replace  age_bins = 61 if age<=65 & age>=61
		replace  age_bins = 66 if age<=70 & age>=66
		replace  age_bins = 71 if age> 71
		lab var  age_bins "5-year age bins"
		order    age_bins, after(age)

		generate male = gender=="M"
		lab var  male "indicator for male account holder" 
		order    male, after(gender)

		gen statefip = state_fips
		merge m:1 year age_bins incomes male statefip using "$PFWdata/cps_weights.dta", keepusing(hwtsupp)
		drop if _merge==2
		drop _merge statefip

		merge m:1 year age_bins incomes male using "$PFWdata/cps_weights_nonstate.dta", keepusing(hwtsupp) update
		drop if _merge==2
		drop _merge

		order hwtsupp, after(userid)


	*** Education

		generate edu_ =.
		replace  edu_ = 10 if educationlevel=="SH"
		replace  edu_ = 12 if educationlevel=="HS"
		replace  edu_ = 13 if educationlevel=="SC"
		replace  edu_ = 14 if educationlevel=="AD"
		replace  edu_ = 16 if educationlevel=="CG"
		replace  edu_ = 18 if educationlevel=="MA"
		replace  edu_ = 19 if educationlevel=="PD"
		replace  edu_ = 22 if educationlevel=="DO"
		bysort year: egen _temp = mean(edu_)
		replace  edu_ = round(_temp) if edu_==.
		rename   edu_ edu
		lab var  edu "education (imputed), approx. years of schooling"
		drop _temp


	*** Alaska FE

		generate Alaska = state=="AK"
		order Alaska, before(state)


*================================
*================================
*================================
save _temp5,replace
use  _temp5,clear	
*================================
*================================
*================================

	*----------------------------------
	* Add CPIu for US and Alaska
	*----------------------------------

		preserve	
			import excel "$homedir/data/APF_Data.xlsx", sheet("5. annual dividend") cellrange(Z4:AA36) clear	
			rename Z  cpiu_AK
			rename AA cpiu_US		
			generate year = 1981 + _n
			order year
			sort  year 
			tempfile cpiu
			save    `cpiu'
			bysort year: sum cpiu*
		restore			
		merge m:1 year using `cpiu'
		drop if _merge==2
		drop _merge


	*--------------------------------------------------------
	* Express nominal variables in current dollars (2014-$)
	*--------------------------------------------------------	
		
		order			userid Month Quarter PFD_year hwtsupp month quarter year day  ///
						APFDid* ///
						APFD APFD_* PFD PFD_placebo PFD_annual /// 
						nondur_serv nondur service durable child_supp otherexp mortgage_rent mortgage rent ///
						cash_withdrawals uncategorized user_defined check_payments unassigned durable_large1 durable_large2 durable_large12 spending_txn_equal_PFD ///
						totexp0 totexp1 totexp2 totexp3 totexp4 totexp5 ///
						incomes incomes_original income incomeM incomeY income_from_PFD ///
						debt_pay tax investment transfer ///
						bank_balance brokerage ret_savings liquid_asset1 liquid_asset2		
		order cat*, last			

		foreach var of varlist ///
						APFD-APFD_annual PFD PFD_placebo PFD_annual /// 
						nondur_serv nondur service durable child_supp otherexp mortgage_rent mortgage rent ///
						cash_withdrawals uncategorized user_defined check_payments unassigned durable_large1 durable_large2 durable_large12 spending_txn_equal_PFD ///
						totexp0 totexp1 totexp2 totexp3 totexp4 totexp5 ///
						incomes incomes_original income incomeM incomeY income_from_PFD ///
						debt_pay tax investment transfer ///
						bank_balance brokerage ret_savings liquid_asset1 liquid_asset2 /// 
						cat1-cat50agg ///
					{
					
			display "`var'"

			* backup of original variable
			generate `var'_nominal = `var'		
						
			* deflate		
			replace `var' = `var'/cpiu_US*236.736 if Alaska==0
			replace `var' = `var'/cpiu_AK*215.805 if Alaska==1
		}



	*-------------------------------------------------
	* Sample selection
	*-------------------------------------------------
		
		********
		* Drop users with monthly gaps in transactions -> weakly balanced (7% dropped)
		********
			
			tabmiss age // no missing age
			xtset userid Month
			tsfill // fill time-series gaps
			generate gap = age==.
			bysort userid: egen has_gaps = max(gap)
			tab month if has_gaps==1
			tab has_gaps
			drop if  has_gaps==1 // drops 7% (11,727 out of 164,946)
			drop gap has_gaps

			
		********
		* Drop users with large discrepancy between inferred and self-reported family size
		********
			
			count
			
			tab1 NumbChecks family_size
			generate diff  = NumbChecks - family_size if state=="AK"
			generate diff_abs = abs(diff)
			tab1 family_size NumbChecks diff diff_abs
			
			drop if family_size>8 & family_size!=.
			drop if NumbChecks >7 & NumbChecks !=.  
			drop if diff_abs   >4 & diff_abs   !=.   

			drop diff diff_abs
			
			count // drops 1% (1,598 out of 153,219)
			
			
				// report statistics for paper
				correlate NumbChecks family_size if state=="AK" & NumbChecks!=0 // 67%


	*-------------------------------------------------
	* Save
	*-------------------------------------------------
	
	compress
	xtset userid Month
save "$homedir/data/stata/PFW_monthly.dta", replace





*=============================================================
* Create quarterly panel (weakly balanced)
*=============================================================

use "$homedir/data/stata/PFW_monthly.dta", clear

	*-------------------------------------------------
	* Aggregate monthly variables to quarterly
	*-------------------------------------------------

	cap drop _temp
	bysort userid Quarter (Month): egen _temp = mean(incomeM)
	generate incomeQ = _temp * 3, after(incomeM)
	drop _temp
	
	foreach var of varlist ///
					APFD-APFD_Garnished PFD PFD_placebo /// 
					nondur_serv nondur service durable child_supp otherexp mortgage_rent mortgage rent ///
					cash_withdrawals uncategorized user_defined check_payments unassigned durable_large1 durable_large2 durable_large12 spending_txn_equal_PFD ///
					totexp0 totexp1 totexp2 totexp3 totexp4 totexp5 ///
					income incomeM /// monthly income
					debt_pay tax investment transfer ///
					cat1-cat50agg ///
				{
		di "`var'"

		bysort userid Quarter: egen _temp = total(`var') 
		replace `var' = _temp
		drop _temp
		
		bysort userid Quarter: egen _temp = total(`var'_nominal)
		replace `var'_nominal = _temp
		drop _temp
	}


	*-------------------------------------------------
	* Collapse to quarterly frequency
	*-------------------------------------------------
	
	count
	duplicates drop userid Quarter, force
	count

	drop Month


	*-------------------------------------------------
	* Save
	*-------------------------------------------------

	compress
	xtset userid Quarter
save "$homedir/data/stata/PFW_quarterly.dta", replace

cap log close PFW_02
